	

	clear all
	*cd 

	set more off
	use data.dta, clear
	
	
	*Calculation for text: "The team was able to contact 1,346 heads of households. In contrast to the contacted households, the uncontacted households used, on average, 13.83% less water per month in the pre-treatment period."	
	gen surveyed=0 if field1~=9
	replace surveyed=1 if field1==1
	label variable surveyed "people surveyed from people in field lists"

	ttest preconsum_201405_201504, by(surveyed)

	
		
	*Disadoption analysis
	***********************
	tab uninstalldate
	tab uninstalldate  if (periodfield1==6 | periodfield1==7) & codcom~=25

	tab uninstalldate if periodfield1==5 | (periodfield1~=. & codcom==25)
	
	
	*-- without bonus group
 	tab uninstalldate  if sorteo~=1 & (periodfield1==6 | periodfield1==7) & codcom~=25

	tab uninstalldate if sorteo~=1 &  periodfield1==5 | (periodfield1~=. & codcom==25)
 

	
	*We only use the experimental sample
	keep if sorteo~=.

	
	*Missing values
	foreach x in 01 02 03 04 05 06 07 08 09 10 11 12{
	gen miss_2013`x'=missing(m32013`x')
	gen miss_2014`x'=missing(m32014`x')
	gen miss_2015`x'=missing(m32015`x')

	}

	foreach x in 01 02 03 04 05 06 07 08 09 {
	gen miss_2016`x'=missing(m32016`x')
	}

	*Number of missings in the whole period, 19 months
	egen miss_all=rowmiss(m3201405 m3201406 m3201407 m3201408 m3201409 m3201410 m3201411 m3201412 m3201501 m3201502 m3201503 m3201504 m3201505 m3201506 m3201507 m3201508 m3201509 m3201510 m3201511 m3201512 m3201601 m3201602 m3201603 m3201604 m3201605 m3201606 m3201607 m3201608 m3201609) 
	label variable miss_all "number of missing values in all the periods since 201405"

	egen miss_midline=rowmiss(m3201405 m3201406 m3201407 m3201408 m3201409 m3201410 m3201411 m3201412 m3201501 m3201502 m3201503 m3201504 m3201505 m3201506 m3201507 m3201508 m3201509 m3201510) 
	label variable miss_midline "number of missing values since 201405 until midline"



	******************************************************************************************************************************************************
	*Addtional variables
	******************************************************************************************************************************************************

 
	
	*We adjust a15_kall with audit 2016 data.
	gen a15_kall_i16=a15_kall
	replace a15_kall_i16=0 if a16_kall==0 & a15_fieldtreated==0
	replace a15_kall_i16=1 if a16_kall==1 & a15_fieldtreated==0
	
	
	*We adjust a16_kall with audit 2015 data. 
	gen a16_kall_i15=a16_kall
	replace a16_kall_i15=0 if a15_kall==0 & a16_fieldtreated==0
	replace a16_kall_i15=0 if a15_kall==1 & a16_fieldtreated==0




	gen a15_katleast1_i16=a15_katleast1
	replace a15_katleast1_i16=0 if a16_katleast1==0 & a15_fieldtreated==0
	replace a15_katleast1_i16=1 if a16_katleast1==1 & a15_fieldtreated==0

	gen a16_katleast1_i15=a16_katleast1
	replace a16_katleast1_i15=0 if a15_katleast1==0 & a16_fieldtreated==0
	replace a16_katleast1_i15=0 if a15_katleast1==1 & a16_fieldtreated==0
	
	
	
	* Treatment in may or june
	***************************
	gen may15=0 if (periodfield1==6 | periodfield1==7)
	replace may15=1 if  periodfield1==5 | (periodfield1~=. & codcom==25)
	label variable may15 "=1 if hh was visited in may 2015"

	
	*  monthly consumption for balance panel
	************************************************
	
	#delim ;

	gen complete_201405_201609=0 ;
	replace complete_201405_201609=1 if m3201405~=. & m3201406~=. & m3201407~=. & m3201408~=. & m3201409~=. & m3201410~=. & m3201411~=. & m3201412~=. & m3201501~=. & m3201502~=. & m3201503~=. & m3201504~=.
	& m3201505~=. & m3201506~=. & m3201507~=. & m3201508~=. & m3201509~=. & m3201510~=. & m3201511~=. 
	& m3201512~=. & m3201601~=. & m3201602~=. & m3201603~=. & m3201604~=. & m3201605~=. & m3201606~=. & m3201607~=. & m3201608~=. & m3201609~=.;
	label variable complete_201405_201609 "=1 if no missing value in each month of period 201405_201609 ";
	
	*Calculation for text: "The panel is unbalanced, with 4.7% of the sample having some missing observations in the monthly water consumption. "
	tab complete_201405_201609 ;


	#delim cr

*15 households that retake technologies (disadopt and then readopt) are considered mistakes in 2016 audit and we believe 2015 audit disadoption.
gen a16_kall_m=a16_kall
replace a16_kall_m=0 if a15_kall==0 & a16_kall==1
	
gen a16_kall_i15_m=a16_kall_i15
replace a16_kall_i15_m=0 if a15_kall_i16==0 & a16_kall_i15==1


gen a16_katleast1_i15_m=a16_katleast1_i15
replace a16_katleast1_i15_m=0 if a15_katleast1_i16==0 & a16_katleast1_i15==1

save data_wide.dta, replace


******************************************************************************************************************************************************
* Table A1.
******************************************************************************************************************************************************

use data_wide.dta, clear
	
eststo sh: quietly estpost tabulate shower if sorteo==1 | sorteo==2
matrix m1=e(b)
eststo kit: quietly estpost tabulate kitchen if sorteo==1 | sorteo==2
matrix m2=e(b)
eststo bath: quietly estpost tabulate bath if sorteo==1 | sorteo==2
matrix m3=e(b)


eststo ish: quietly estpost tabulate numshower if sorteo==1 | sorteo==2
estadd scalar isr = (e(b)[1,2]*1+e(b)[1,3]*2)/(m1[1,2]*1+m1[1,3]*2)

eststo ikit: quietly estpost tabulate numkitchen if sorteo==1 | sorteo==2
estadd scalar isr = (e(b)[1,2]*1+e(b)[1,3]*2)/(m2[1,2]*1+m2[1,3]*2)

eststo ibath: quietly estpost tabulate numbath  if sorteo==1 | sorteo==2
estadd scalar isr = (e(b)[1,2]*1+e(b)[1,3]*2)/(m3[1,2]*1+m3[1,3]*2)


esttab sh kit bath ish ikit ibath using tableA1.rtf, cells("b(fmt(2))") label nodepvar varwidth(35) modelwidth(6) replace title (Table.Installations) stats(isr)




***********************************************************************************************************************************************************************
* Attrition
***********************************************************************************************************************************************************************	
use data_wide.dta, clear

	*Calculation for text: "The panel is unbalanced, with 4.7% of the sample having some missing observations in the monthly water consumption. "
	tab attrition_all_since201405  //same as variable complete_201405_201609//
	
	
***********************************************************************************************************************************************************************	
*Figure 4. Disadoption rates at midline (2015) and endline (2016). 
***********************************************************************************************************************************************************************
* "Figure: Patterns of disadoption"
*-------------
*Since we have retakers (14), we consider that if households disadopt at midline, they do not have the tech at endline.
tab a16_kall_i15_m a15_kall_i16
display 292/842 // late disadopters
display 394/842 // perfect compliers
display  156/842 // early disadopters
*display .35+0.47+0.18


*By groups
*Only tech
tab a16_kall_i15_m a15_kall_i16 if sorteo==2
display 138/416 // late disadopters
display 173/416 // perfect compliers
display  105/416 // early disadopters


*Tech + Bonus
tab a16_kall_i15_m a15_kall_i16 if sorteo==1
display 154/426 // late disadopters
display 221/426 // perfect compliers
display  51/426 // early disadopters


/* *Calculation for text: Footnote 9 in section F. Disadoption and behavioral responses
"The team was unable to audit all treated homes. For the values reported Fig. 4, we impute the missing audit status (see Appendix A7). Considering only the values from homes observed in both audits, 50.6\% kept all technologies until endline, 36.6\% disadopted at least one fixture between midline and endline, and 12.8\% disadopted at least one fixture before midline." */

tab a16_kall_m 
tab a15_kall
tab a16_kall_m a15_kall
display 263/718
display 363/718
display 92/718

*Calculation for text: "The field teams were unable to contact and enter the homes of every treated household to do the audit: 10.9% of the households were unaudited in 2015 , 3.5% were unaudited in 2016 , and 2.5% were unaudited in both ."
**********************************************************************************************************************************************************************

tab a15_field a16_field if tech==1 
display 22/864
display 94/864
display 30/864

**********************************************************************************************************************************************************************
*Calculation for text: "1.If the household was observed with the technology in 2016 ..."

tab a16_kall_m if a15_field==0 & a16_field==1 & tech==1
tab a16_kall if a15_field==0
tab a15_kall if a16_field==0


**********************************************************************************************************************************************************************
*Calculation for text: Note of Figure 4: "“Disadoption” means the household uninstalled one or more of the installed fixtures. Only 8.1\% of households uninstalled all fixtures by midline, but 23.5\% of households uninstalled all fixtures by endline."
tab a15_katleast1_i16
tab a16_katleast1_i15_m

***********************************************************************************************************************************************************************	
*Testing balance 
***********************************************************************************************************************************************************************
	
* Table 1. Summary Statistics by Treatment Condition

use data_wide.dta, clear

eststo clear
eststo treated: quietly estpost summarize members shower kitchen bath owner yearsinhome minwage sschool assembly preconsum_since201405  if treat==1
eststo control: quietly estpost summarize members shower kitchen bath owner yearsinhome minwage sschool assembly preconsum_since201405  if treat==0
eststo all: quietly estpost summarize members shower kitchen bath owner yearsinhome minwage sschool assembly preconsum_since201405 
esttab treated control all using table1_balance.rtf, cells("mean(fmt(2)) sd(fmt(2))") label nodepvar varwidth(35) modelwidth(6) replace title (Table 1. Summary statistics by treatment condition)


******************************************************************************************************************************************************
	* Table 2, column 7. ITT with cross-sectional data (for robustness) 
******************************************************************************************************************************************************	
	use data_wide.dta, clear
	

	global preconsum052014 m3201504 m3201503 m3201502 m3201501 m3201412 m3201411 m3201410 m3201409 m3201408 m3201407 m3201406 m3201405
	


	eststo clear
	sum posconsum_all2016 if treat==0
	scalar m=r(mean)
	eststo csitt: reg posconsum_all2016 treat preconsum_since201405 i.codcom i.interviewer, robust 

	mat b=e(b)
	scalar cs=b[1,1]/m
	estadd scalar cs
	
	esttab using tableA3_cs-itt.rtf , indicate("community FE = *codcom*" "interviewer FE = *interviewer*")  drop(preconsum_since201405)  ///
	stats(cs N, fmt(4 0) labels("Effect in % of control group water consumption" "Observations")) title(Table 2 col7. ITT using Cross-Sectional Data) b(2) ci(2) replace label varwidth(35) modelwidth(10) star(* 0.10 ** 0.05 *** 0.01)  ///
	addnote("Estimation includes average consumption in the pre-treatment period, community and interviewer dummies. In brackets are 95% confidence intervals, constructed from robust standard error estimates. ")


	
	*Section III b Estimand and Estimator. 
	*Calculation for text: "With less than 1\% non-compliance at installation, we believe..."
	tab tech
	
******************************************************************************************************************************************************
******************************************************************************************************************************************************
	* Building panel structure and interaction variables
******************************************************************************************************************************************************	drop if treat==.
******************************************************************************************************************************************************

	use data_wide.dta, clear

	reshape long m3, i(cid) j(month)
	
	
	
	*All periods after treatment (depending on the month in which the installation team surveyed and installed the technologies (if treated group): May, June or July)
	gen tec=0 if sorteo~=.
	replace tec=1 if month>=201507 & (periodfield1==6 | periodfield1==7)
	replace tec=1 if month>=201506 & periodfield1==5
	replace tec=1 if month>=201506 & periodfield1~=. & codcom==25
	label variable tec "post"
	
	
	*Interaction with tec and treatment condition  (treated or not, or keeping techs or bonus)
	gen tectreat=tec*treat
	label variable tectreat "receive technology * post"
	
	

	* Number of observations per household
	**************************************
	by cid, sort: egen obscid_201201=count(m3)
	***62% have all months (57) of observations, 30% have only 45 months (a year less)
	***If we consider only period from 201405, 95% have all the months(29).
	
	


	
	*Create date variable
	tostring month, replace
	gen ano=substr(month,1,4)
	gen mes=substr(month,5,2)
	destring mes ano, replace
	gen fecha=ym(ano,mes)
	format fecha %tm
	destring month, replace



	
	*DROPPING OBSERVATIONS
	***************************
	drop if month<201301

	
	
	* Number of observations per household
	by cid, sort: egen obscid_201301=count(m3)
	
	
	xtset cid month

			
	* Number of observations per household	
	by cid, sort: egen obscid_201405=count(m3) if month>201404
	by cid, sort: egen obscid_201502=count(m3) if month>201501	
	by cid, sort: egen obscid_201301_201509=count(m3) if month<201510
	by cid, sort: egen obscid_201405_201509=count(m3) if month>201404 & month<201510
	by cid, sort: egen obscid_201502_201509=count(m3) if month>201501 & month<201510	
	
	
	label variable m3 "m3"
	
	*For monthly analysis
	gen start_tech=0 if treat~=.
	replace start_tech=1 if treat==1 & month==201505 & (periodfield1==5 | codcom==25)
	replace start_tech=1 if treat==1 & month==201506 & (periodfield1==6 | periodfield1==7 ) & codcom~=25

	sort cid month
foreach x in 1 2 3 4 5 6 7 8 9 10 11 12 13{

	bysort cid: gen lag`x'tech = start_tech[_n-`x']
	bysort cid: gen lead`x'tech = start_tech[_n+`x']
	replace lag`x'tech=0 if lag`x'tech==.
	replace lead`x'tech=0 if lead`x'tech==.

}

foreach x in 14 15 16{

	bysort cid: gen lag`x'tech = start_tech[_n-`x']
	replace lag`x'tech=0 if lag`x'tech==.

}

	
	save data_long.dta, replace


******************************************************************************************************************************************************
	
	*Finding the right panel data model
******************************************************************************************************************************************************
	
	/* Hausman test
	******************
	To decide between fixed or random effects. The Ho is that the preferred model is random effects vs. the 
	alternative the fixed effects (see Green, 2008, chapter 9). It basically tests whether the unique
	errors (ui) are correlated with the regressors, the null hypothesis (RE) is that they are not.*/
	
	use data_long.dta, clear
	*Pretreatment since May 2014 (year earlier)
	keep if month>201404

	xtreg m3  tectreat i.month i.codcom i.interviewer, fe 
	estimates store fixed

	xtreg m3  tectreat i.month i.codcom i.interviewer
	estimates store random

	hausman fixed random
	
	*We should use random effects	

	
	/* Testing for random effects: Breusch-Pagan Lagrange multiplier (LM) 
	***********************************************************************
	The LM test helps you decide between a random effects regression and a simple
	OLS regression.
	The null hypothesis in the LM test is that variances across entities is zero. This is,
	no significant difference across units (i.e. no panel effect). The command in Stata
	is xttest0 type it right after running the random effects model.
	*/

	xtreg m3  tectreat i.month i.codcom i.interviewer
	xttest0
	* Variance of individual effects is not zero, so random effects is better than OLS.
	
	
	*Testing for time-fixed effects
	********************************
	
	xtreg m3  tectreat i.month i.codcom i.interviewer
	testparm i.month

	*testparm. It is a joint test to see if the dummies for all years are equal to 0, if they are then no time fixed effects are needed.
	*The P-value=0, so time fixed effects are needed.
	
	*Testing heterocedasticity
	****************************
	*The null is homoskedasticity (or constant variance). We reject the null and conclude heteroskedasticity.
	xtreg m3  tectreat tec i.mes i.ano, fe
	*xttest3
	*use robust option
	
	

	
******************************************************************************************************************************************************
******************************************************************************************************************************************************	
	*ESTIMATIONS
******************************************************************************************************************************************************	
******************************************************************************************************************************************************

	*******************
	*******************
	
	*Long-term Analysis
	
	*******************
	*******************
	

	*pre-treatment period 052014-042015
	*----------------------------------*

	*Table 2. ITT 
	*************
	use data_long.dta, clear
	*Pretreatment since May 2014 (year earlier)
	keep if month>201404

		
	
	*(Column 1) Pre-treatment period
	*----------
	sum m3 if treat==0 & month<201505
	scalar m=r(mean)
	eststo pre: xtreg m3 treat  i.month i.codcom i.interviewer if month<201505, vce(cluster cid)
	mat b=e(b)
	scalar per1=b[1,1]/m
	
	estadd scalar per1

	
	* (Column 5) Simple Dif-Dif
	****************
	sum m3 if treat==0 & tec==1
	scalar m2=r(mean)
	scalar sd2=r(sd)
	
	eststo difdif: xtreg m3  tectreat tec,vce(cluster cid) fe
	mat b=e(b)
	scalar per5=b[1,1]/m2
	estadd scalar per5



	*(Column 6) Fixed Effects
	*----------
	sum m3 if treat==0 & tec==1
	scalar m2=r(mean)
	scalar sd2=r(sd)		
	eststo fe: xtreg m3  tectreat  i.month ,vce(cluster cid) fe
	mat b=e(b)
	scalar per6=b[1,1]/m2
	estadd scalar per6
	

	*(Column 2)
	*----------
	
	* Calculation for text: "In the post-treatment period, the control group consumed on average..."
	sum m3 if treat==0 & tec==1
	scalar m2=r(mean)
	scalar sd2=r(sd)
	
	eststo re: xtreg m3  tectreat  i.month i.codcom i.interviewer,vce(cluster cid)
	mat b=e(b)
	scalar per2=b[1,1]/m2
	estadd scalar per2
	
	

		
	*Table 2, Column 4. ITT no bonus
	*----------
	
	* Calculation for text: "In the post-treatment period, the control group consumed on average..."
	sum m3 if treat==0 & tec==1
	scalar m2=r(mean)
	scalar sd2=r(sd)
	eststo renb: xtreg m3  tectreat i.month i.codcom i.interviewer if sorteo~=1,vce(cluster cid)
	mat b=e(b)
	scalar per4=b[1,1]/m2
	estadd scalar per4
	
	
	
	esttab pre re renb  difdif fe csitt using table2.rtf , indicate("community FE = *codcom*" "month FE=*month*" "interviewer FE=*interviewer*") compress stats(per1 per2 per4 per5 per6 cs N, fmt(4 4 4 4 4 4 0) labels( "Treatment effect" "Treatment effect" "Treatment effect" "Treatment effect" "Treatment effect" "Treatment effect" "Observations")) title(Table 2. Estimated Treatment Effect of Technology Adoption on Water Consumption (m3/month)) b(2) ci(2) replace label varwidth(15) modelwidth(10) star(* 0.10 ** 0.05 *** 0.01)  
	


	
	

	
	*Figure 2. Estimated Treatment Effect Per month 
	*----------------------------------------------*
		use data_long.dta, clear
		keep if month>201404
		eststo clear
		

		xtreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer, vce(cluster cid)

		matrix table=r(table)

		matselrc table m_itt, r(1 5 6) c(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)

		mat mitt=m_itt'


			svmat mitt
			rename mitt1 estimate
			rename  mitt2 lower
			rename  mitt3 upper
			gen yearg=.
			local i=1
			*only 12 months 
			forvalues x = -12(+1)15{
			replace yearg=`x' in `i'
			local i=`i'+1
	}


	*Calculation for text:"The estimated effect of the technologies in the first month after installation is a reduction of 2.95 m3, or 12.1%"
	matrix list mitt

	*Section F disadoption. 
	*Calculation for text: "If we make the extreme assumption that these households...", page 24
	*In excel "Disadoption analysis_1" or here:
	scalar cace_m1=mitt[15,1]/(1-18/748)

	
	
	*I use the following two tests in the paper
	lincom (lag1tech+lag2tech+lag3tech)/3 - (lag13tech+lag14tech+lag15tech)/3 
	
	testparm lag1tech lag2tech lag3tech lag4tech lag5tech lag6tech lag7tech lag8tech lag9tech lag10tech lag11tech lag12tech lag13tech lag14tech lag15tech  , equal

	
	
	*Figure
	set scheme plotplain	


	graph twoway (connected estimate yearg),  xtitle("Months before and after installation")   yline(0) xlabel(-12(2)16) ylabel(-6(2)4) || rcap upper lower yearg,  lcolor(grey%30)  legend(label(1 "Estimate") label(2 "95% Confidence Interval")) 
	graph export "Fig2_monthlyitt_rcap.png", replace


	
	
	


		*----------------------------------------
		*-----------------------------------------	
		*Without units that got bonus (sorteo~=1) 
		*----------------------------------------
		*-----------------------------------------

	

		*Figure A2: Per month ITT - without units that got bonus (sorteo==1) 
		***************
		xtreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer if sorteo~=1, vce(cluster cid)

		matrix table_nb=r(table)

		matselrc table_nb m_itt_nb, r(1 5 6) c(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30)

		mat mitt_nb=m_itt_nb'


			svmat mitt_nb
			rename mitt_nb1 estimate_nb
			rename  mitt_nb2 lower_nb
			rename  mitt_nb3 upper_nb
			gen yearg_nb=.
			local i=1
			*only 12 months 
			forvalues x = -12(+1)15{
			replace yearg_nb=`x' in `i'
			local i=`i'+1
	}
		
		
		*I use the following two tests in the paper
		lincom (lag1tech+lag2tech+lag3tech)/3 - (lag13tech+lag14tech+lag15tech)/3 
		
		testparm lag1tech lag2tech lag3tech lag4tech lag5tech lag6tech lag7tech lag8tech lag9tech lag10tech lag11tech lag12tech lag13tech lag14tech lag15tech  , equal

		
		*Figure - Comparing results with and without units that got bonus (sorteo==1) 
		set scheme plotplain	

		graph twoway  connected estimate_nb yearg_nb  || rcap upper_nb lower_nb yearg_nb|| connected estimate yearg  || rcap upper lower yearg,  xtitle("Months before and after installation")   yline(0) xlabel(-12(2)16) ylabel(-6(2)4) legend(label(3 "Estimate") label(4 "Confidence Interval")  label(1 "Estimate, no bonus") label(2 "Confidence Interval, no bonus"))
		graph export "FigA2_monthlyitt_rcap.png", replace
		
		

	* Appendix A5: Fourth month ITT
	****************************************
	*Calculation for text: "This calculation starts with the estimated monthly ITT for the first four months after installation...."


	*Using the lead, lag model 
	gen lag1234tech=0
	replace lag1234tech=1 if lag1tech==1 | lag2tech==1 | lag3tech==1 | lag4tech==1
	
	xtreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1234tech lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12tech lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer, vce(cluster cid)
	scalar itt4m=e(b)[1,15]
	
	
	xtreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1234tech lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12tech lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer if sorteo~=1, vce(cluster cid)
	scalar itt4m_nb=e(b)[1,15]

		
		
		
		
		


	
**********************************************************************************************************************************************************************
	*Calculation for text:"Households that had at least one failed audit had pre-treatment water use of 22.08 m$^3/$month (SD=17.23) versus 25.43 m$^3/$month (SD=16.32) in fully audited households."
	use data_long.dta, clear
	keep if month>201404
	
	*Fully audited		
	sum m3 if a15_field==1 & a16_field==1 & tec==0 & treat==1
	*Households that had at least one failed audit
	sum m3 if (a15_field==0 | a16_field==0) & tec==0 & treat==1

	
	
	
	
	
	



***************************************************************************************************************************************************************************
*Balance Panel. Table 2 (Column 3) 
***************************************************************************************************************************************************************************
	use data_long.dta, clear
	keep if month>201404
	
	keep if complete_201405_201609==1
	
	
	eststo clear
	*(1)
	sum m3 if treat==0 & month<201505
	scalar m=r(mean)
	eststo pre: xtreg m3 treat i.month i.codcom  i.interviewer if month<201505, vce(cluster cid) 
	mat b=e(b)
	scalar per1=b[1,1]/m
	estadd scalar per1


	
	*(2)
	sum m3 if treat==0 & tec==1
	scalar m2=r(mean)
	scalar sd2=r(sd)
	eststo all: xtreg m3  tectreat i.month i.codcom i.interviewer,vce(cluster cid)
	mat b=e(b)
	scalar per2=b[1,1]/m2
	estadd scalar per2
	scalar sd2=b[1,1]/sd2
	
	
	xtreg m3  tectreat i.month i.codcom i.interviewer,vce(cluster cid)
	eststo eq1_nl:nlcom (tectreat:_b[tectreat]/m2 ), post 
	esttab eq1_nl, ci

	*Lincom and nlcom provide the same results
	xtreg m3  tectreat i.month i.codcom i.interviewer,vce(cluster cid)
	lincom _b[tectreat]/m2 





	esttab pre all using table2_balanced_itt.rtf , indicate("community FE = *codcom*" "month FE=*month*" "interviewer FE=*interviewer*") compress stats(per1 per2 N, fmt(4 4 4 4 4 0) labels("Treatment effect" "Treatment effect" "Observations")) title(Table 2 Column 3. Estimated Treatment Effect of Technology Adoption on Water Consumption (m3/month)using a balanced panel) b(2) ci(2) replace label varwidth(25) modelwidth(10) star(* 0.10 ** 0.05 *** 0.01)  ///




***************************************************************************************************************************************************************************
*Quantile Regression for panel (for Welfare Analysis)
***************************************************************************************************************************************************************************


	use data_long.dta, clear
	keep if month>201404
	sum m3 if treat==0 & tec==1
	local m2=r(mean)
	
	 *Extreme values for itt
	 *-----------------------
	xtqreg m3  tectreat i.month i.codcom i.interviewer, id(cid) quantile(0.01)
	
	*matrix mat_xtqreg = J(99,4,.)
	matrix mat_xtqreg = J(2,4,.)

	local i=1
	*forvalues x = 0.01(0.01)1{
	foreach x in 0.05 0.95{  
	 xtqreg m3  tectreat i.month i.codcom i.interviewer, id(cid) quantile(`x')
	 mat list e(b)
	 mat b=e(b)
	 test tectreat
	 matrix mat_xtqreg[`i',1]=`x'
	 matrix mat_xtqreg[`i',2]=b[1,1]
	 matrix mat_xtqreg[`i',3]=b[1,1]/`m2'
	 matrix mat_xtqreg[`i',4]=e(N)
	 local i=`i'+1
	 }
	 
	 
	 
	 *Extreme values for itt_fm
	 *-----------------------

	xtqreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer, id(cid) quantile(0.01)
	matrix mat_xtqreg_fm = J(2,4,.)
	local i=1
	foreach x in 0.05 0.95{
	 xtqreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer, id(cid) quantile(`x')
	 mat list e(b)
	 mat b=e(b)
	 matrix mat_xtqreg_fm[`i',1]=`x'
	 matrix mat_xtqreg_fm[`i',2]=b[1,15]
	 matrix mat_xtqreg_fm[`i',3]=b[1,15]/`m2'
	 matrix mat_xtqreg_fm[`i',4]=e(N)
	 local i=`i'+1
	 }

	 
	* Extreme values for itt_nb (without units that got bonus (sorteo==1) )
	*-----------------------------------------------------------------------
	xtqreg m3  tectreat i.month i.codcom i.interviewer if sorteo~=1, id(cid) quantile(0.01)
	matrix mat_xtqregnb = J(2,4,.)
	local i=1
	foreach x in 0.05 0.95{
	 xtqreg m3  tectreat i.month i.codcom i.interviewer if sorteo~=1, id(cid) quantile(`x')
	 mat list e(b)
	 mat b=e(b)
	 test tectreat
	 matrix mat_xtqregnb[`i',1]=`x'
	 matrix mat_xtqregnb[`i',2]=b[1,1]
	 matrix mat_xtqregnb[`i',3]=b[1,1]/`m2'
	 matrix mat_xtqregnb[`i',4]=e(N)
	 local i=`i'+1
	 }


	* Extreme values for itt_nb_fm (without units that got bonus (sorteo==1) )
	*-----------------------------------------------------------------------
	 xtqreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer if sorteo~=1, id(cid) quantile(0.01)
	matrix mat_xtqregnb_fm = J(2,4,.)
	local i=1
	foreach x in 0.05 0.95{
	 xtqreg m3 lead13* lead12* lead11* lead10* lead9* lead8* lead7* lead6* lead5* lead4* lead3* lead2* lead1tech start_tech lag1tech lag2* lag3* lag4* lag5* lag6* lag7* lag8* lag9* lag10* lag11* lag12* lag13* lag14* lag15* lag16* i.month i.codcom i.interviewer if sorteo~=1, id(cid) quantile(`x')
	 mat list e(b)
	 mat b=e(b)
	 matrix mat_xtqregnb_fm[`i',1]=`x'
	 matrix mat_xtqregnb_fm[`i',2]=b[1,15]
	 matrix mat_xtqregnb_fm[`i',3]=b[1,15]/`m2'
	 matrix mat_xtqregnb_fm[`i',4]=e(N)
	 local i=`i'+1
	 }

	

***************************************************************************************************************************************************************************
*Table 5. Perceptions of technologies by household type
***************************************************************************************************************************************************************************
  

 use data_wide.dta, clear


eststo clear
eststo pc: quietly estpost summarize a15_durationbathlonger a15_durationkitchenlonger a15_durationshowerlonger a15_flowbath a15_flowkitchen a15_flowshower a16_durationbathlonger a16_durationkitchenlonger a16_durationshowerlonger a16_flowbath a16_flowkitchen a16_flowshower if a16_kall_i15_m==1
eststo ld: quietly estpost summarize a15_durationbathlonger a15_durationkitchenlonger a15_durationshowerlonger a15_flowbath a15_flowkitchen a15_flowshower a16_durationbathlonger a16_durationkitchenlonger a16_durationshowerlonger a16_flowbath a16_flowkitchen a16_flowshower if a16_kall_i15_m==0 & a15_kall_i16==1
eststo ed: quietly estpost summarize a15_durationbathlonger a15_durationkitchenlonger a15_durationshowerlonger a15_flowbath a15_flowkitchen a15_flowshower a16_durationbathlonger a16_durationkitchenlonger a16_durationshowerlonger a16_flowbath a16_flowkitchen a16_flowshower if a15_kall_i16==0

esttab pc ld ed using tablex_x.rtf, cells("mean(fmt(2))") mtitle("Perfect compliers" "Late disadopters" "Early disadopters") label nodepvar varwidth(35) modelwidth(6) replace title (Table x. Perceptions of Technologies by Household Types)

 
***********************************************************************************************************************


*/

